Wang Haihua
🍈 🍉🍊 🍋 🍌
试利用太阳黑子个数文件 sunspots.csv, 建立适 当的 ARMA 模型, 并预测 1989 年太阳黑子个数。 解 可以使用 statsmodels.api.tsa.ARMA()函数来拟合 ARMA 模型, 下面先初步地使用 ARMA $(9,1)$ 模型来拟合数据。得到 1989 年太阳黑子预测 值为 141 个。
代码
import pandas as pd, numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
plt.rc('font',family='SimHei'); plt.rc('font',size=16)
d=pd.read_csv('sunspots.csv',usecols=['counts'])
md=sm.tsa.ARMA(d,(9,1)).fit()
years=np.arange(1700,1989) #已知观测值的年代
dhat=md.predict()
plt.plot(years[-20:],d.values[-20:],'o-k')
plt.plot(years[-20:],dhat.values[-20:],'P--')
plt.legend(('原始观测值','预测值')); plt.show()
dnext=md.predict(d.shape[0],d.shape[0])
print(dnext) #显示下一期的预测值
下面给出一个完整的建模步骤。 第一步:画出原始数据的折线图如图 $18.2$ 所示, 初步确定观测数据是 平稳的。画出序列的自相关图和偏相关图如图 。
第二步: 利用 AIC 和 BIC 准则, 确定选择 ARMA(4,2), 利用 Python 软 件,求得模型的计算结果,残差取值及分布 。 第三步:利用得到的模型, 得到 1989 年太阳黑子预测值为 139 个。原始数据及其预测值对比。
代码
import pandas as pd, numpy as np
import statsmodels.api as sm
import matplotlib.pylab as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plt.rc('axes',unicode_minus=False)
plt.rc('font',family='SimHei'); plt.rc('font',size=16)
d=pd.read_csv('sunspots.csv'); dd=d['counts']
years=d['year'].values.astype(int)
plt.plot(years,dd.values,'-*'); plt.figure()
ax1=plt.subplot(121); plot_acf(dd,ax=ax1,title='自相关')
ax2=plt.subplot(122); plot_pacf(dd,ax=ax2,title='偏自相关')
for i in range(1,6):
for j in range(1,6):
md=sm.tsa.ARMA(dd,(i,j)).fit()
print([i,j,md.aic,md.bic])
zmd=sm.tsa.ARMA(dd,(4,2)).fit()
print(zmd.summary()) #显示模型的所有信息
residuals = pd.DataFrame(zmd.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title="残差", ax=ax[0])
residuals.plot(kind='kde', title='密度', ax=ax[1])
plt.legend(''); plt.ylabel('')
dhat=zmd.predict(); plt.figure()
plt.plot(years[-20:],dd.values[-20:],'o-k')
plt.plot(years[-20:],dhat.values[-20:],'P--')
plt.legend(('原始观测值','预测值'))
dnext=zmd.predict(d.shape[0],d.shape[0])
print(dnext) #显示下一期的预测值
plt.show()
import pandas as pd, numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
plt.rc('font',family='SimHei'); plt.rc('font',size=16)
d=pd.read_csv('data/sunspots.csv',usecols=['counts'])
md=sm.tsa.ARMA(d,(9,1)).fit()
years=np.arange(1700,1989) #已知观测值的年代
dhat=md.predict()
plt.plot(years[-20:],d.values[-20:],'o-k')
plt.plot(years[-20:],dhat.values[-20:],'P--')
plt.legend(('原始观测值','预测值'));
plt.savefig('images/pre2301.png')
plt.show()
dnext=md.predict(d.shape[0],d.shape[0])
print(dnext) #显示下一期的预测值
C:\Users\reformship\AppData\Roaming\Python\Python38\site-packages\statsmodels\tsa\arima_model.py:472: FutureWarning: statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the . between arima and model) and statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release. statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and is both well tested and maintained. To silence this warning and continue using ARMA and ARIMA until they are removed, use: import warnings warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA', FutureWarning) warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA', FutureWarning) warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
289 141.006818 dtype: float64
import pandas as pd, numpy as np
import statsmodels.api as sm
import matplotlib.pylab as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plt.rc('axes',unicode_minus=False)
plt.rc('font',family='SimHei'); plt.rc('font',size=16)
d=pd.read_csv('data/sunspots.csv'); dd=d['counts']
years=d['year'].values.astype(int)
plt.plot(years,dd.values,'-*'); plt.figure()
ax1=plt.subplot(121); plot_acf(dd,ax=ax1,title='自相关')
ax2=plt.subplot(122); plot_pacf(dd,ax=ax2,title='偏自相关')
for i in range(1,6):
for j in range(1,6):
md=sm.tsa.ARMA(dd,(i,j)).fit()
print([i,j,md.aic,md.bic])
zmd=sm.tsa.ARMA(dd,(4,2)).fit()
print(zmd.summary()) #显示模型的所有信息
residuals = pd.DataFrame(zmd.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title="残差", ax=ax[0])
residuals.plot(kind='kde', title='密度', ax=ax[1])
plt.legend(''); plt.ylabel('')
dhat=zmd.predict(); plt.figure()
plt.plot(years[-20:],dd.values[-20:],'o-k')
plt.plot(years[-20:],dhat.values[-20:],'P--')
plt.legend(('原始观测值','预测值'))
dnext=zmd.predict(d.shape[0],d.shape[0])
print(dnext) #显示下一期的预测值
plt.show()
C:\Users\reformship\AppData\Roaming\Python\Python38\site-packages\statsmodels\tsa\arima_model.py:472: FutureWarning: statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the . between arima and model) and statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release. statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and is both well tested and maintained. To silence this warning and continue using ARMA and ARIMA until they are removed, use: import warnings warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA', FutureWarning) warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA', FutureWarning) warnings.warn(ARIMA_DEPRECATION_WARN, FutureWarning)
[1, 1, 2534.4114405386576, 2549.0771472911074] [1, 2, 2486.3548625561575, 2504.68699599672] [1, 3, 2481.638175466555, 2503.6367355952293] [1, 4, 2501.8107719344875, 2527.4757587512745] [1, 5, 2479.017097290776, 2508.3485107956753] [2, 1, 2451.5373784047038, 2469.869511845266] [2, 2, 2452.426385218229, 2474.4249453469033] [2, 3, 2454.3953817032966, 2480.0603685200836] [2, 4, 2436.7575585908367, 2466.088972095736] [2, 5, 2435.389800609163, 2468.387640802175] [3, 1, 2448.3675882072525, 2470.366148335927] [3, 2, 2417.7962648857724, 2443.4612517025594] [3, 3, 2411.6547554543745, 2440.986168959274] [3, 4, 2411.7420967010135, 2444.7399368940255] [3, 5, 2408.3966704994027, 2445.060937380527] [4, 1, 2435.9273833250854, 2461.5923701418724] [4, 2, 2411.3526698578394, 2440.684083362739] [4, 3, 2413.131513155178, 2446.12935334819] [4, 4, 2410.7025696059654, 2447.3668364870896] [4, 5, 2410.296066397891, 2450.626759967128] [5, 1, 2434.7775503510134, 2464.108963855913] [5, 2, 2412.797353010159, 2445.795193203171] [5, 3, 2414.3857764130876, 2451.0500432942117] [5, 4, 2411.035582230158, 2451.3662757993948] [5, 5, 2412.2825059215984, 2456.2796261789476] ARMA Model Results ============================================================================== Dep. Variable: counts No. Observations: 289 Model: ARMA(4, 2) Log Likelihood -1197.676 Method: css-mle S.D. of innovations 15.159 Date: Fri, 20 May 2022 AIC 2411.353 Time: 23:42:56 BIC 2440.684 Sample: 0 HQIC 2423.106 ================================================================================ coef std err z P>|z| [0.025 0.975] -------------------------------------------------------------------------------- const 49.7381 6.211 8.008 0.000 37.565 61.911 ar.L1.counts 2.8101 0.086 32.694 0.000 2.642 2.979 ar.L2.counts -3.1178 0.218 -14.294 0.000 -3.545 -2.690 ar.L3.counts 1.5248 0.213 7.165 0.000 1.108 1.942 ar.L4.counts -0.2366 0.080 -2.954 0.003 -0.394 -0.080 ma.L1.counts -1.6480 0.057 -29.016 0.000 -1.759 -1.537 ma.L2.counts 0.7885 0.055 14.396 0.000 0.681 0.896 Roots ============================================================================= Real Imaginary Modulus Frequency ----------------------------------------------------------------------------- AR.1 0.8639 -0.5664j 1.0330 -0.0924 AR.2 0.8639 +0.5664j 1.0330 0.0924 AR.3 1.0927 -0.0000j 1.0927 -0.0000 AR.4 3.6250 -0.0000j 3.6250 -0.0000 MA.1 1.0450 -0.4197j 1.1262 -0.0608 MA.2 1.0450 +0.4197j 1.1262 0.0608 ----------------------------------------------------------------------------- 289 139.440812 dtype: float64